\(~\) \(~\) \(~\)
Package load:
library(dplyr)
library(tidyverse)
library(readxl)
library(ggplot2)
options(scipen=999)
install.packages('gganimate')
\(~\)
gdp <- read_csv("data/income_per_person_gdppercapita_ppp_inflation_adjusted.csv") %>%
pivot_longer(-country, names_to = "year", values_to = "income_person") %>%
mutate(year = as.integer(year)) #here I read, pivot, and clean my data at once assignt it to gdp (income)
gdp
## # A tibble: 46,513 × 3
## country year income_person
## <chr> <int> <dbl>
## 1 Afghanistan 1800 603
## 2 Afghanistan 1801 603
## 3 Afghanistan 1802 603
## 4 Afghanistan 1803 603
## 5 Afghanistan 1804 603
## 6 Afghanistan 1805 603
## 7 Afghanistan 1806 603
## 8 Afghanistan 1807 603
## 9 Afghanistan 1808 603
## 10 Afghanistan 1809 603
## # … with 46,503 more rows
life_expectancy <- read_csv("data/life_expectancy_years.csv") %>%
pivot_longer(-country, names_to = "year", values_to = "life_exp") %>%
mutate(year = as.integer(year))
life_expectancy
## # A tibble: 40,953 × 3
## country year life_exp
## <chr> <int> <dbl>
## 1 Afghanistan 1800 28.2
## 2 Afghanistan 1801 28.2
## 3 Afghanistan 1802 28.2
## 4 Afghanistan 1803 28.2
## 5 Afghanistan 1804 28.2
## 6 Afghanistan 1805 28.2
## 7 Afghanistan 1806 28.1
## 8 Afghanistan 1807 28.1
## 9 Afghanistan 1808 28.1
## 10 Afghanistan 1809 28.1
## # … with 40,943 more rows
population <- read_csv("data/population_total.csv") %>%
pivot_longer(-country, names_to = "year", values_to = "population") %>%
mutate(year = as.integer(year)) %>%
mutate(population = as.double(population))
population
## # A tibble: 58,695 × 3
## country year population
## <chr> <int> <dbl>
## 1 Afghanistan 1800 3280000
## 2 Afghanistan 1801 3280000
## 3 Afghanistan 1802 3280000
## 4 Afghanistan 1803 3280000
## 5 Afghanistan 1804 3280000
## 6 Afghanistan 1805 3280000
## 7 Afghanistan 1806 3280000
## 8 Afghanistan 1807 3280000
## 9 Afghanistan 1808 3280000
## 10 Afghanistan 1809 3280000
## # … with 58,685 more rows
geo_location <- read_excel("data/Data Geographies - v1 - by Gapminder.xlsx", sheet = "list-of-countries-etc") %>%
select("name", "four_regions") %>%
dplyr::rename(country = name)
geo_location
## # A tibble: 197 × 2
## country four_regions
## <chr> <chr>
## 1 Afghanistan asia
## 2 Albania europe
## 3 Algeria africa
## 4 Andorra europe
## 5 Angola africa
## 6 Antigua and Barbuda americas
## 7 Argentina americas
## 8 Armenia europe
## 9 Australia asia
## 10 Austria europe
## # … with 187 more rows
\(~\) \(~\) \(~\)
table1 <- geo_location %>%
left_join(gdp,by = "country") %>%
left_join(life_expectancy, by = c("country","year")) %>%
left_join(population, by = c("country","year"))
\(~\) \(~\) \(~\)
table1810 <- table1 %>%
filter(year == "1810",
na.rm = TRUE) %>% #to avoid NA values
ggplot(aes(x = income_person,
y = life_exp,
colour = four_regions,
size = population)) +
geom_point() +
labs(title = "Life expectancy",
subtitle = "1810",
caption = "Source: Gapminder",
y = "Life expectancy",
x = "Income",
color = "Continent",
size = "Population") + #adding labels to my plot
scale_y_continuous(
limits = c(25, 75),
breaks = c(25, 50, 75),
minor_breaks = NULL) + #minor_braks remove the unnecessary limits on my scale
scale_x_continuous(
limits = c(400, 40000),
breaks = c(400, 4000, 40000),
minor_breaks = NULL,
trans = "log10") #the argument trans = "log10" multiply my y axis by 10 (400, 4000, 40000), giving them the
#space ("block")
table1810
table1996 <- table1 %>%
filter(year == "1996",
na.rm = TRUE) %>%
ggplot(aes(x = income_person,
y = life_exp,
colour = four_regions,
size = population)) +
geom_point()+
labs(title = "Life expectancy",
subtitle = "1996",
caption = "Source: Gapminder",
y = "Life expectancy",
x = "Income",
color = "Continent",
size = "Population") +
scale_y_continuous(
limits = c(25, 75),
breaks = c(25, 50, 75),
minor_breaks = NULL) +
scale_x_continuous(
limits = c(400, 40000),
breaks = c(400, 4000, 40000),
minor_breaks = NULL,
trans = "log10")
table1996
table2009 <- table1 %>%
filter(year == "2009",
na.rm = TRUE) %>%
ggplot(aes(x = income_person,
y = life_exp,
colour = four_regions,
size = population)) +
geom_point() +
labs(title = "Life expectancy",
subtitle = "2009",
caption = "Source: Gapminder",
y = "Life expectancy",
x = "Income",
color = "Continent",
size = "Population") +
scale_y_continuous(
limits = c(25, 75),
breaks = c(25, 50, 75),
minor_breaks = NULL) +
scale_x_continuous(
limits = c(400, 40000),
breaks = c(400, 4000, 40000),
minor_breaks = NULL,
trans = "log10")
table2009
\(~\) \(~\)
library(gganimate)
library(scales)
g <- ggplot(table1, aes(x = income_person,
y = life_exp,
colour = four_regions,
size = population,
label = as.character(round(year, 2)))) + #here I just copied the labels, but used
#as.character(round(year, 2)) to hide decimals in my title
geom_point(show.legend = TRUE, alpha = 0.17) + #here I enable the legends
scale_color_viridis_d() +
scale_size(range = c(2, 15)) +
labs(caption = "Source: Gapminder",
y = "Life expectancy",
x = "Income",
color = "Continent",
size = "Population") +
scale_y_continuous(
limits = c(25, 75),
breaks = c(25, 50, 75),
minor_breaks = NULL) +
scale_x_continuous(
limits = c(400, 40000),
breaks = c(400, 4000, 40000),
minor_breaks = NULL,
trans = "log10")
g + transition_time(year) +
labs(title = "Year: {frame_time}") #transition_time(year) create a gif animation, layer over layer according to year